/**************************************
This program generates estimates used in Figure 4

Regressions are state x cohort x year postgrad 
Storing estimates in bias_scatter_bystate_cip.dta

Author: Andrew Foote
Date: 11/19/2020


**************************************/
#delimit ;
set more off; 

include "./config.do" ;
include "./drbvars.do" ; 

use "$datadir/all_earnings_long.dta", clear; 

des;

tab year_grad;  

/*************************************
First, keep only people
who aren't missing demographics.
*************************************/


include "./restrictions.do" ;

include "./top_school.do" ;


local demogs "male white hispanic black asian " ;

/* defining missing as no in-state earnings, so that
it matches what researchers have */ 
gen missing = instate_earnings ==0 ;

postfile bias_scatter beta_in in_N beta_out out_N  bias missing_diff 
								str2 state str2 cip2 grad_year year_post 
                                using $supportdir/bias_scatter_bystate_cip.dta, replace;

tab ui_state top_school, m ;

*gen cip_2dig = substr(degcip,1,2) ;
drop if cip_2dig == "99" ;

tab opeid if top_school == . ; 
levelsof ui_state, local(states) ;
levelsof cip_2dig,local(fields) ;

foreach state in `states' { ;
   foreach cip in `fields' { ; 
   forvalues cohort = 2001/2006 { ;
       forvalues ypg = 1/10 { ;
	      count if ui_state == "`state'" & year_grad == `cohort' & year-year_grad == `ypg' & cip_2dig == "`cip'";
              di "============================================" ;
              di "STATE: `state'     " ;
              di "CIP: `cip'    " ;
              di "Year Grad: `cohort'    " ;
              di "Year Postgrad: `ypg'   " ;
              di "Observation counts: " r(N) ;
              di "============================================" ;
              if r(N) > 0  { ; 
			capture noisily reg log_earn_instate top_school `demogs' 
								if ui_state == "`state'" & year_grad == `cohort'
                                                      & year-year_grad == `ypg' & cip_2dig == "`cip'" , vce(cluster pik);
                          count if e(sample) ;
                          if r(N) > 0 { ;
                               local beta_in = _b[top_school];
                          } ;
                          else { ;
                                local beta_in = . ;
                          } ; 
                          local in_N =  e(N) ;
			  
			capture noisily reg log_earn_national top_school  `demogs' 
								if ui_state == "`state'" & year_grad == `cohort'
                                   & year-year_grad == `ypg' & cip_2dig == "`cip'", vce(cluster pik) ;
                             count if e(sample) ;
                             if r(N) > 0 { ;
                                    local beta_out = _b[top_school];
                             } ;
                             else { ;
                                    local beta_out = . ;
                             } ; 
                             local out_N =  e(N) ;
			  
			capture noisily reg missing top_school `demogs' 
								if ui_state == "`state'" & year_grad == `cohort'
                                   & year-year_grad == `ypg' & cip_2dig == "`cip'", vce(cluster pik) ;
			  count if e(sample) ;
                          if r(N) > 0 { ;
                             local missing_diff = _b[top_school] ;
                          } ;
                          else { ;
                             local missing_diff = . ;
                          } ; 
			  local bias = `beta_out' - `beta_in' ;
			  
                          post bias_scatter (`beta_in') (`in_N') (`beta_out') (`out_N') (`bias') 
						 (`missing_diff') ("`state'") ("`cip'") (`cohort') (`ypg') ;
                         }; 
		} ;
	} ;
} ;
} ; 

postclose bias_scatter;

